In [1]:
# Import all the necessary modules
import os
os.environ["THEANO_FLAGS"] = "mode=FAST_RUN,optimizer=None,device=cpu,floatX=float32"
import sys
sys.path.insert(0,'..')
import numpy as np
import theano
import theano.tensor as T
import lasagne
from confusionmatrix import ConfusionMatrix
from utils import iterate_minibatches
import matplotlib.pyplot as plt
import time
import itertools
%matplotlib inline
The first thing that we have to do is to define the network architecture. Here we are going to use an input layer, two convolutional layers with max pooling, a dense layer and an output layer. These are the steps that we are going to follow:
1.- Specify the hyperparameters of the network:
In [2]:
batch_size = 128
seq_len = 400
n_feat = 20
n_hid = 30
n_class = 10
lr = 0.0025
n_filt = 10
drop_prob = 0.5
2.- Define the input variables to our network:
In [3]:
# We use ftensor3 because the protein data is a 3D-matrix in float32
input_var = T.ftensor3('inputs')
# ivector because the labels is a single dimensional vector of integers
target_var = T.ivector('targets')
# Dummy data to check the size of the layers during the building of the network
X = np.random.randint(0,10,size=(batch_size,seq_len,n_feat)).astype('float32')
Xmask = np.ones((batch_size,seq_len)).astype('float32')
3.- Define the layers of the network:
In [4]:
# Input layer, holds the shape of the data
l_in = lasagne.layers.InputLayer(shape=(batch_size, seq_len, n_feat), input_var=input_var, name='Input')
print('Input layer: {}'.format(
lasagne.layers.get_output(l_in, inputs={l_in: input_var}).eval({input_var: X}).shape))
# Shuffle shape to be properly read by the CNN layer
l_shu = lasagne.layers.DimshuffleLayer(l_in, (0,2,1))
print('DimshuffleLayer layer: {}'.format(
lasagne.layers.get_output(l_shu, inputs={l_in: input_var}).eval({input_var: X}).shape))
# Convolutional layers with different filter size
l_conv_a = lasagne.layers.Conv1DLayer(l_shu, num_filters=n_filt, pad='same', stride=1,
filter_size=3, nonlinearity=lasagne.nonlinearities.rectify)
print('Convolutional layer size 3: {}'.format(
lasagne.layers.get_output(l_conv_a, inputs={l_in: input_var}).eval({input_var: X}).shape))
l_conv_b = lasagne.layers.Conv1DLayer(l_shu, num_filters=n_filt, pad='same', stride=1,
filter_size=5, nonlinearity=lasagne.nonlinearities.rectify)
print('Convolutional layer size 5: {}'.format(
lasagne.layers.get_output(l_conv_b, inputs={l_in: input_var}).eval({input_var: X}).shape))
# The output is concatenated
l_conc = lasagne.layers.ConcatLayer([l_conv_a, l_conv_b], axis=1)
print('Concatenated convolutional layers: {}'.format(
lasagne.layers.get_output(l_conc, inputs={l_in: input_var}).eval({input_var: X}).shape))
# Second CNN layer
l_conv_final = lasagne.layers.Conv1DLayer(l_conc, num_filters=n_filt*2, pad='same',
stride=1, filter_size=3,
nonlinearity=lasagne.nonlinearities.rectify)
print('Final convolutional layer: {}'.format(
lasagne.layers.get_output(l_conv_final, inputs={l_in: input_var}).eval({input_var: X}).shape))
# Max pooling is performed to downsample the input and reduce its dimensionality
final_max_pool = lasagne.layers.MaxPool1DLayer(l_conv_final, 5)
print('Max pool layer: {}'.format(
lasagne.layers.get_output(final_max_pool, inputs={l_in: input_var}).eval({input_var: X}).shape))
# Dense layer with ReLu activation function
l_dense = lasagne.layers.DenseLayer(final_max_pool, num_units=n_hid, name="Dense",
nonlinearity=lasagne.nonlinearities.rectify)
print('Dense layer: {}'.format(
lasagne.layers.get_output(l_dense, inputs={l_in: input_var}).eval({input_var: X}).shape))
# Output layer with a Softmax activation function
l_out = lasagne.layers.DenseLayer(lasagne.layers.dropout(l_dense, p=drop_prob), num_units=n_class, name="Softmax",
nonlinearity=lasagne.nonlinearities.softmax)
print('Output layer: {}'.format(
lasagne.layers.get_output(l_out, inputs={l_in: input_var}).eval({input_var: X}).shape))
4.- Calculate the prediction and network loss for the training set and update the network weights:
In [5]:
# Get output training, deterministic=False is used for training
prediction = lasagne.layers.get_output(l_out, inputs={l_in: input_var}, deterministic=False)
# Calculate the categorical cross entropy between the labels and the prediction
t_loss = T.nnet.categorical_crossentropy(prediction, target_var)
# Training loss
loss = T.mean(t_loss)
# Parameters
params = lasagne.layers.get_all_params([l_out], trainable=True)
# Get the network gradients and perform total norm constraint normalization
all_grads = lasagne.updates.total_norm_constraint(T.grad(loss, params),3)
# Update parameters using ADAM
updates = lasagne.updates.adam(all_grads, params, learning_rate=lr)
5.- Calculate the prediction and network loss for the validation set:
In [6]:
# Get output validation, deterministic=True is only use for validation
val_prediction = lasagne.layers.get_output(l_out, inputs={l_in: input_var}, deterministic=True)
# Calculate the categorical cross entropy between the labels and the prediction
t_val_loss = lasagne.objectives.categorical_crossentropy(val_prediction, target_var)
# Validation loss
val_loss = T.mean(t_val_loss)
6.- Build theano functions:
In [7]:
# Build functions
train_fn = theano.function([input_var, target_var], [loss, prediction], updates=updates)
val_fn = theano.function([input_var, target_var], [val_loss, val_prediction])
In [8]:
# Load the encoded protein sequences, labels and masks
# The masks are not needed for the FFN or CNN models
train = np.load('data/reduced_train.npz')
X_train = train['X_train']
y_train = train['y_train']
mask_train = train['mask_train']
print(X_train.shape)
In [9]:
validation = np.load('data/reduced_val.npz')
X_val = validation['X_val']
y_val = validation['y_val']
mask_val = validation['mask_val']
print(X_val.shape)
In [10]:
# Number of epochs
num_epochs = 80
# Lists to save loss and accuracy of each epoch
loss_training = []
loss_validation = []
acc_training = []
acc_validation = []
start_time = time.time()
min_val_loss = float("inf")
# Start training
for epoch in range(num_epochs):
# Full pass training set
train_err = 0
train_batches = 0
confusion_train = ConfusionMatrix(n_class)
# Generate minibatches and train on each one of them
for batch in iterate_minibatches(X_train.astype(np.float32), y_train.astype(np.int32),
mask_train.astype(np.float32), batch_size, shuffle=True, sort_len=False):
# Inputs to the network
inputs, targets, in_masks = batch
# Calculate loss and prediction
tr_err, predict = train_fn(inputs, targets)
train_err += tr_err
train_batches += 1
# Get the predicted class, the one with the maximum likelihood
preds = np.argmax(predict, axis=-1)
confusion_train.batch_add(targets, preds)
# Average loss and accuracy
train_loss = train_err / train_batches
train_accuracy = confusion_train.accuracy()
cf_train = confusion_train.ret_mat()
val_err = 0
val_batches = 0
confusion_valid = ConfusionMatrix(n_class)
# Generate minibatches and validate on each one of them, same procedure as before
for batch in iterate_minibatches(X_val.astype(np.float32), y_val.astype(np.int32),
mask_val.astype(np.float32), batch_size, shuffle=True, sort_len=False):
inputs, targets, in_masks = batch
err, predict_val = val_fn(inputs, targets)
val_err += err
val_batches += 1
preds = np.argmax(predict_val, axis=-1)
confusion_valid.batch_add(targets, preds)
val_loss = val_err / val_batches
val_accuracy = confusion_valid.accuracy()
cf_val = confusion_valid.ret_mat()
loss_training.append(train_loss)
loss_validation.append(val_loss)
acc_training.append(train_accuracy)
acc_validation.append(val_accuracy)
# Save the model parameters at the epoch with the lowest validation loss
if min_val_loss > val_loss:
min_val_loss = val_loss
np.savez('params/CNN_params.npz', *lasagne.layers.get_all_param_values(l_out))
print("Epoch {} of {} time elapsed {:.3f}s".format(epoch + 1, num_epochs, time.time() - start_time))
print(" training loss:\t\t{:.6f}".format(train_loss))
print(" validation loss:\t\t{:.6f}".format(val_loss))
print(" training accuracy:\t\t{:.2f} %".format(train_accuracy * 100))
print(" validation accuracy:\t\t{:.2f} %".format(val_accuracy * 100))
In [11]:
print("Minimum validation loss: {:.6f}".format(min_val_loss))
In [12]:
x_axis = range(num_epochs)
plt.figure(figsize=(8,6))
plt.plot(x_axis,loss_training)
plt.plot(x_axis,loss_validation)
plt.xlabel('Epoch')
plt.ylabel('Error')
plt.legend(('Training','Validation'));
In [13]:
plt.figure(figsize=(8,6))
plt.plot(x_axis,acc_training)
plt.plot(x_axis,acc_validation)
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(('Training','Validation'));
In [14]:
# Plot confusion matrix
# Code based on http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html
plt.figure(figsize=(8,8))
cmap=plt.cm.Blues
plt.imshow(cf_val, interpolation='nearest', cmap=cmap)
plt.title('Confusion matrix validation set')
plt.colorbar()
tick_marks = np.arange(n_class)
classes = ['Nucleus','Cytoplasm','Extracellular','Mitochondrion','Cell membrane','ER',
'Chloroplast','Golgi apparatus','Lysosome','Vacuole']
plt.xticks(tick_marks, classes, rotation=60)
plt.yticks(tick_marks, classes)
thresh = cf_val.max() / 2.
for i, j in itertools.product(range(cf_val.shape[0]), range(cf_val.shape[1])):
plt.text(j, i, cf_val[i, j],
horizontalalignment="center",
color="white" if cf_val[i, j] > thresh else "black")
plt.tight_layout()
plt.ylabel('True location')
plt.xlabel('Predicted location');
In [ ]: